Prepare Data
params_aq <- read.csv("../aeroqual/2021-06/pse_calvals_2021-06.csv")
param_paths_pse_all_O3 <- list.files('results', 'OZONE*', full.names = T)
param_path_pse_current_O3 <- param_paths_pse_all_O3[length(param_paths_pse_all_O3)]
params_pse_O3 <- read.csv(param_path_pse_current_O3)
param_paths_pse_all_NO2 <- list.files('results', 'NO2*', full.names = T)
param_path_pse_current_NO2 <- param_paths_pse_all_NO2[length(param_paths_pse_all_NO2)]
params_pse_NO2 <- read.csv(param_path_pse_current_NO2)
raw_data <- read.csv('raw_data/raw_60_min.csv') %>%
filter(ID %in% c('AQY BB-633', 'AQY BB-642'))
proxy_O3 <- get_current_proxy_data('OZONE')
## [1] "Reading existing proxy data from results/proxy/OZONE_proxy_data.csv"
## [1] "Requesting data starting with 2021-07-01 11:00:00"
## [1] "Proxy data for OZONE now current - writing to results/proxy"
proxy_NO2 <- get_current_proxy_data('NO2')
## [1] "Reading existing proxy data from results/proxy/NO2_proxy_data.csv"
## [1] "Requesting data starting with 2021-07-01 10:00:00"
## [1] "Requesting proxy data from Air District for NO2 from 2021-07-01 10 to 2021-07-01 11"
## [1] "Proxy data for NO2 now current - writing to results/proxy"
O3_data_aq <- inner_join(params_aq, raw_data, by = 'ID') %>%
filter(ID %in% c('AQY BB-633', 'AQY BB-642')) %>%
filter(Time >= deployment_date & Time >= start_date & Time <= end_date & Time < 2021) %>%
transmute(ID=ID, TIME=Time, O3.RAW = O3_raw,
O3.AQ = round(O3.gain*(O3_raw - O3.offset), 2),
O3.GAIN.AQ=O3.gain, O3.OFFSET.AQ=O3.offset)
O3_data_pse <- inner_join(params_pse_O3, raw_data, by = 'ID') %>%
filter(ID %in% c('AQY BB-633', 'AQY BB-642')) %>%
filter(Time >= deployment_date & Time >= start_date & Time <= end_date & Time < 2021) %>%
transmute(ID=ID, TIME=Time,
O3.PSE = round(O3.offset + O3.gain*O3_raw, 2),
O3.GAIN.PSE=O3.gain, O3.OFFSET.PSE=O3.offset)
O3_data <- inner_join(O3_data_aq, O3_data_pse, by = c('ID', 'TIME')) %>%
inner_join(., proxy_O3, by = c('TIME'='timestamp_pacific')) %>%
mutate(TIMESTAMP = ymd_hms(TIME), O3.PROXY = round(proxy_rand, 2)) %>%
dplyr::select(ID, TIME, TIMESTAMP, O3.RAW, O3.PROXY, O3.AQ, O3.PSE, O3.GAIN.AQ, O3.GAIN.PSE, O3.OFFSET.AQ, O3.OFFSET.PSE) %>%
filter(O3.GAIN.PSE != 1)
NO2_data_aq <- inner_join(params_aq, raw_data, by = 'ID') %>%
filter(ID %in% c('AQY BB-633', 'AQY BB-642')) %>%
filter(Time >= deployment_date & Time >= start_date & Time <= end_date & Time < 2021) %>%
mutate(O3.AQ = O3.gain*(O3_raw - O3.offset), OX.AQ = Ox.gain*(Ox_raw - Ox.offset), NO2.INT = OX.AQ-NO2.a.value*O3.AQ) %>%
transmute(ID=ID, TIME=Time, NO2.GAIN.AQ=NO2.gain, NO2.OFFSET.AQ=NO2.offset, NO2.AQ = round(NO2.gain*(NO2.INT - NO2.offset), 2))
NO2_data_pse <- inner_join(params_pse_O3, raw_data, by = 'ID') %>%
filter(ID %in% c('AQY BB-633', 'AQY BB-642')) %>%
filter(Time >= deployment_date & Time >= start_date & Time <= end_date & Time < 2021) %>%
transmute(ID=ID, TIME=Time, NO2.RAW = NO2_raw, Ox.RAW = Ox_raw, O3.PSE = round(O3.offset + O3.gain*O3_raw, 2)) %>%
inner_join(params_pse_NO2, by = 'ID') %>%
filter(TIME >= deployment_date & TIME >= start_date & TIME <= end_date & TIME < 2021) %>%
transmute(ID=ID, TIME=TIME, NO2.RAW=NO2.RAW,
NO2.PSE = round(NO2.offset + NO2.gain.Ox*Ox.RAW - NO2.gain.O3*O3.PSE, 2),
NO2.GAIN.PSE=NO2.gain.Ox, NO2.OFFSET.PSE=NO2.offset)
NO2_data <- inner_join(NO2_data_pse, proxy_NO2, by = c('TIME'='timestamp_pacific')) %>%
inner_join(NO2_data_aq, by = c('ID', 'TIME')) %>%
mutate(TIMESTAMP = ymd_hms(TIME), NO2.PROXY = round(proxy_rand, 2)) %>%
dplyr::select(ID, TIME, TIMESTAMP, NO2.RAW, NO2.PROXY, NO2.PSE, NO2.AQ, NO2.GAIN.PSE, NO2.GAIN.AQ, NO2.OFFSET.PSE, NO2.OFFSET.AQ) %>%
filter(NO2.GAIN.PSE != 1)
Write Functions to Examine Performance
get_timeseries <- function(input_data, aqy_id, month.i, pollutant){
timeseries_plot <- ggplot(input_data) +
geom_line(aes(x = TIMESTAMP, y = POL.RAW, color = 'raw')) +
geom_line(aes(x = TIMESTAMP, y = POL.PROXY, color = 'proxy')) +
geom_line(aes(x = TIMESTAMP, y = POL.AQ, color = 'aeroqual')) +
geom_line(aes(x = TIMESTAMP, y = POL.PSE, color = 'pse')) +
labs(title = paste(pollutant, 'Timeseries', '-', aqy_id, 'Month of', month.i),
x = 'Timestamp', y = paste('Pollutant Concentration (ppb)')) +
theme(plot.title = element_text(hjust = .5))
print(timeseries_plot)
}
get_error_timeseries <- function(input_data, aqy_id, month.i, pollutant){
error_timeseries <- input_data %>%
mutate(AE.RAW = POL.RAW - POL.PROXY, AE.AQ = POL.AQ - POL.PROXY, AE.PSE = POL.PSE - POL.PROXY) %>%
ggplot() +
geom_line(aes(x = TIMESTAMP, y = AE.RAW, color = 'raw')) +
geom_line(aes(x = TIMESTAMP, y = AE.AQ, color = 'aeroqual')) +
geom_line(aes(x = TIMESTAMP, y = AE.PSE, color = 'pse')) +
labs(title = paste('Absolute Error for', pollutant, '-', aqy_id, 'month of', month.i),
x = 'Timestamp', y = 'Absolute Error (ppb)') +
theme(plot.title = element_text(hjust = .5))
print(error_timeseries)
}
get_scatterplot <- function(input_data, aqy_id, month.i, pollutant){
scatterplot <- ggplot(input_data) +
geom_line(aes(x = POL.PROXY, y = POL.PROXY, color = '1-to-1 fit')) +
geom_point(aes(x = POL.PROXY, y = POL.RAW, color = 'raw'), alpha = .5) +
geom_point(aes(x = POL.PROXY, y = POL.PSE, color = 'pse'), alpha = .5) +
geom_point(aes(x = POL.PROXY, y = POL.AQ, color = 'aeroqual'), alpha = .5) +
labs(title = paste(pollutant, '[Proxy] vs [Measured] for', aqy_id, 'month of', month.i),
x = 'Timestamp', y = 'Pollutant Concentration - (ppb)') +
theme(plot.title = element_text(hjust = .5))
print(scatterplot)
}
get_lms <- function(input_data, aqy_id, month.i){
model_raw <- summary(lm('POL.PROXY~POL.RAW', data = input_data))
model_pse <- summary(lm('POL.PROXY~POL.PSE', data = input_data))
model_aq <- summary(lm('POL.PROXY~POL.AQ', data = input_data))
r_sq <- data.frame(cbind(month.i, round(model_raw$r.squared, 5), round(model_pse$r.squared, 5), round(model_aq$r.squared, 5)))
colnames(r_sq) <- c('month', 'raw', 'PSE', 'AQ')
return(r_sq)
}
get_offset_timeseries <- function(input_data, aqy_id, pollutant){
offset_timeseries <- ggplot(input_data) +
geom_line(aes(x = TIMESTAMP, y = POL.OFFSET.PSE, color = 'pse')) +
geom_line(aes(x = TIMESTAMP, y = POL.OFFSET.AQ, color = 'aeroqual')) +
ggtitle(paste(pollutant, 'Offset Over Time for', aqy_id)) +
theme(plot.title = element_text(hjust = .5))
print(offset_timeseries)
}
get_gain_timeseries <- function(input_data, aqy_id, pollutant){
gain_timeseries <- ggplot(input_data) +
geom_line(aes(x = TIMESTAMP, y = POL.GAIN.PSE, color = 'pse')) +
geom_line(aes(x = TIMESTAMP, y = POL.GAIN.AQ, color = 'aeroqual')) +
ggtitle(paste(pollutant, 'Gain Over Time for', aqy_id)) +
theme(plot.title = element_text(hjust = .5))
print(gain_timeseries)
}
get_r2_plot <- function(input_data, aqy_id, pollutant){
r2_plot <- ggplot(input_data) +
geom_line(aes(x = as.numeric(month), y = as.numeric(AQ), color = 'aeroqual')) +
geom_line(aes(x = as.numeric(month), y = as.numeric(PSE), color = 'pse')) +
labs(x = 'Month', y = 'R^2',
title = paste(pollutant, 'Proxy ~ Measured R^2 Over Time for', aqy_id)) +
theme(plot.title = element_text(hjust = .5))
print(r2_plot)
}
Run Performance Tests
months <- c('07', '08', '09', '10', '11', '12')
make_all_plots <- function(input_data, aqy_id, pollutant){
colnames(input_data) <- str_replace_all(colnames(input_data), pollutant, 'POL')
monthly_models <- data.frame()
for(month.i in months){
data_for_month <- filter(input_data, ID == aqy_id & TIME >= paste0('2020-', month.i, '-01 00:00:00') & TIME <= paste0('2020-', month.i, '-31 23:59:59'))
get_timeseries(data_for_month, aqy_id, month.i, pollutant)
get_error_timeseries(data_for_month, aqy_id, month.i, pollutant)
get_scatterplot(data_for_month, aqy_id, month.i, pollutant)
month.i_model <- get_lms(data_for_month, aqy_id, month.i)
monthly_models <- rbind(monthly_models, month.i_model)
}
data_for_deployment <- filter(input_data, ID == aqy_id)
overall_model <- get_lms(data_for_deployment, aqy_id, 'Full Deployment')
all_models <- rbind(overall_model, monthly_models)
print(all_models)
get_r2_plot(monthly_models, aqy_id, pollutant)
get_offset_timeseries(data_for_deployment, aqy_id, pollutant)
get_gain_timeseries(data_for_deployment, aqy_id, pollutant)
}
make_all_plots(O3_data, 'AQY BB-633', 'O3')


















## month raw PSE AQ
## 1 Full Deployment 0.93177 0.92231 0.9088
## 2 07 0.94545 0.94546 0.94545
## 3 08 0.82566 0.81203 0.82563
## 4 09 0.92125 0.92125 0.92125
## 5 10 0.94121 0.94122 0.94121
## 6 11 0.95829 0.95829 0.95829
## 7 12 0.97763 0.97762 0.97763



make_all_plots(O3_data, 'AQY BB-642', 'O3')


















## month raw PSE AQ
## 1 Full Deployment 0.51751 0.84718 0.79159
## 2 07 0.93618 0.93618 0.93618
## 3 08 0.83907 0.86549 0.83903
## 4 09 0.79235 0.90946 0.79235
## 5 10 0.85183 0.84658 0.85183
## 6 11 0.85054 0.85054 0.85053
## 7 12 0.92646 0.92646 0.92646



make_all_plots(NO2_data, 'AQY BB-633', 'NO2')


















## month raw PSE AQ
## 1 Full Deployment 0.00209 0.36109 0.34982
## 2 07 0.08326 0.41048 0.30509
## 3 08 0.07389 0.2281 0.3344
## 4 09 0.01097 0.17064 0.0877
## 5 10 0.01211 0.17098 0.32383
## 6 11 0.00297 0.20912 0.38802
## 7 12 0.01048 0.34544 0.50841



make_all_plots(NO2_data, 'AQY BB-642', 'NO2')


















## month raw PSE AQ
## 1 Full Deployment 0.00131 0.36241 0.18356
## 2 07 0.03021 0.18307 0.22217
## 3 08 0.05849 0.34288 0.35047
## 4 09 2e-04 0.08401 0.06613
## 5 10 0.01849 0.06664 0.05924
## 6 11 0.06217 0.2831 0.2362
## 7 12 0.02605 0.30737 0.30099



MANUAL PLOTS
get_scatter_facet_aqy <- function(in_data, pollutant, aqy_id){
colnames(in_data) <- str_replace_all(colnames(in_data), pollutant, 'POL')
scatter_facet <- in_data %>%
filter(ID == aqy_id) %>%
mutate(MONTH = month(TIMESTAMP, label = T, abbr = F), .after = 'TIMESTAMP') %>%
ggplot() +
geom_line(aes(x = POL.PROXY, y = POL.PROXY, color = '1-to-1 Fit')) +
geom_point(aes(x = POL.PROXY, y = POL.RAW, color = 'Raw'), size = .5, alpha = .5) +
geom_point(aes(x = POL.PROXY, y = POL.AQ, color = 'Aeroqual'), size = .5, alpha = .5) +
geom_point(aes(x = POL.PROXY, y = POL.PSE, color = 'PSE'), size = .5, alpha = .5) +
labs(x = 'Proxy Concentration (ppb)', y = 'Measured Concentration (ppb)', title = paste('Calibrated', pollutant, '& Proxy Values - ', aqy_id)) +
theme(plot.title = element_text(hjust = .5)) +
facet_wrap(~MONTH)
return(scatter_facet)
}
get_scatter_facet_aqy(O3_data, 'O3', 'AQY BB-633')

get_scatter_facet_aqy(O3_data, 'O3', 'AQY BB-642')

get_scatter_facet_aqy(NO2_data, 'NO2', 'AQY BB-633')

get_scatter_facet_aqy(NO2_data, 'NO2', 'AQY BB-642')
